Fragestellung: “Die Manschafft, die zur Halbzeit vorne liegt,
gewinnt mit einer Chance von mindestens 75% auch das Spiel. Falls zur
Halbzeit unentschieden ist, gewinnt eher das Heimteam.”
Dafür nehmen wir den Datacamp Datensatz Soccer
Data
Als Einführung werden wir auf Datacamp folgende Kurse durchgehen:
# Bibliotheken importieren
library("plotly")
library("dplyr")
Attaching package: ‘dplyr’
The following objects are masked from ‘package:plyr’:
arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library("plyr")
# List files in Data folder
files <- list.files(path="./Data/", pattern=NULL, all.files=FALSE, full.names=TRUE)
# Create DataFrame with all csv from 2015-2019
df <- ldply(.data = files, .fun = read.csv)
# View entire DataFrame in R Studio
View(df)
htr_table <- df %>%
count(HTR)
ftr_table <- df %>%
count(FTR)
Results = c("Away", "Draw", "Home")
HT_count <- c(htr_table$n)
FT_count <- c(ftr_table$n)
df_results <- data.frame(Results, HT_count, FT_count)
fig <- plot_ly(
df_results, x = ~Results, y = ~HT_count, type = 'bar', name = 'Half Time Score') %>%
add_trace(y = ~FT_count, name = 'Full Time Score') %>%
layout(yaxis = list(title = 'Count'), barmode = 'group')
fig
NA
# HTR & FTR in einer Spalte zusammenfügen
df$result <- paste(df$HTR, df$FTR)
df_count_results <- df %>%
group_by(result) %>%
summarise(count_result = n() / nrow(df) * 100)
df_count_results %>%
plot_ly(x = ~reorder(result, count_result), y = ~count_result) %>%
add_bars() %>%
layout(xaxis = list(categoryorder = "total descending", title = "Game Progress"),
yaxis = list(title = "Probability"),
title = "What is the probability of a game progress?")
NA
calc_prob <- function(df1, df2) {
win_prob <- 100 / nrow(df1) * nrow(df2)
}
df_halftime_home <- df %>% filter(HTR == "H")
df_halftime_away <- df %>% filter(HTR == "A")
df_halftime_draw <- df %>% filter(HTR == "D")
df_fulltime_home_win <- df_halftime_home %>% filter(FTR == "H")
df_fulltime_away_win <- df_halftime_away %>% filter(FTR == "A")
df_fulltime_draw <- df_halftime_draw %>% filter(FTR == "D")
# Heimteam führt zur Halbzeit und gewinnt das Spiel
home_win_prob <- calc_prob(df_halftime_home, df_fulltime_home_win)
print(home_win_prob)
[1] 82.54902
print(nrow(df_fulltime_home_win))
[1] 421
# Auswärtsteam führt zur Halbzeit und gewinnt das Spiel
away_win_prob <- calc_prob(df_halftime_away, df_fulltime_away_win)
print(away_win_prob)
[1] 72.03166
print(nrow(df_fulltime_away_win))
[1] 273
# Unentschieden zur Halbzeit und auch am Ende des Spiels
draw_prob <- calc_prob(df_halftime_draw, df_fulltime_draw)
print(draw_prob)
[1] 36.45008
print(nrow(df_fulltime_draw))
[1] 230
LS0tCnRpdGxlOiAiRGF0YXZpeiBtaXQgUGxvdGx5IFBMIERhdGEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIEZyYWdlc3RlbGx1bmc6ICJEaWUgTWFuc2NoYWZmdCwgZGllIHp1ciBIYWxiemVpdCB2b3JuZSBsaWVndCwgZ2V3aW5udCBtaXQgZWluZXIgQ2hhbmNlIHZvbiBtaW5kZXN0ZW5zIDc1JSBhdWNoIGRhcyBTcGllbC4gRmFsbHMgenVyIEhhbGJ6ZWl0IHVuZW50c2NoaWVkZW4gaXN0LCBnZXdpbm50IGVoZXIgZGFzIEhlaW10ZWFtLiIKCgpEYWbDvHIgbmVobWVuIHdpciBkZW4gRGF0YWNhbXAgRGF0ZW5zYXR6IFtTb2NjZXIgRGF0YV0oaHR0cHM6Ly9hcHAuZGF0YWNhbXAuY29tL3dvcmtzcGFjZS9kYXRhc2V0cy9kYXRhc2V0LXB5dGhvbi1zb2NjZXIpCgpBbHMgRWluZsO8aHJ1bmcgd2VyZGVuIHdpciBhdWYgRGF0YWNhbXAgZm9sZ2VuZGUgS3Vyc2UgZHVyY2hnZWhlbjoKCi0gW0ludGVyYWN0aXZlIERhdGEgVmlzdWFsaXphdGlvbiB3aXRoIHBsb3RseV0oaHR0cHM6Ly9hcHAuZGF0YWNhbXAuY29tL2xlYXJuL2NvdXJzZXMvaW50ZXJhY3RpdmUtZGF0YS12aXN1YWxpemF0aW9uLXdpdGgtcGxvdGx5LWluLXIpCgotIFtJbnRlcm1lZGlhdGUgSW50ZXJhY3RpdmUgRGF0YSBWaXN1YWxpemF0aW9uIHdpdGggcGxvdGx5XShodHRwczovL2FwcC5kYXRhY2FtcC5jb20vbGVhcm4vY291cnNlcy9pbnRlcmFjdGl2ZS1kYXRhLXZpc3VhbGl6YXRpb24td2l0aC1wbG90bHktaW4tcikKCmBgYHtyfQojIEJpYmxpb3RoZWtlbiBpbXBvcnRpZXJlbgpsaWJyYXJ5KCJwbG90bHkiKQpsaWJyYXJ5KCJkcGx5ciIpCmxpYnJhcnkoInBseXIiKQpgYGAKCmBgYHtyfQojIExpc3QgZmlsZXMgaW4gRGF0YSBmb2xkZXIKZmlsZXMgPC0gbGlzdC5maWxlcyhwYXRoPSIuL0RhdGEvIiwgcGF0dGVybj1OVUxMLCBhbGwuZmlsZXM9RkFMU0UsIGZ1bGwubmFtZXM9VFJVRSkKCiMgQ3JlYXRlIERhdGFGcmFtZSB3aXRoIGFsbCBjc3YgZnJvbSAyMDE1LTIwMTkKZGYgPC0gbGRwbHkoLmRhdGEgPSBmaWxlcywgLmZ1biA9IHJlYWQuY3N2KQoKIyBWaWV3IGVudGlyZSBEYXRhRnJhbWUgaW4gUiBTdHVkaW8KVmlldyhkZikKCmBgYAoKYGBge3J9Cmh0cl90YWJsZSA8LSBkZiAlPiUKCWNvdW50KEhUUikKCmZ0cl90YWJsZSA8LSBkZiAlPiUKCWNvdW50KEZUUikKClJlc3VsdHMgPSBjKCJBd2F5IiwgIkRyYXciLCAiSG9tZSIpCkhUX2NvdW50IDwtIGMoaHRyX3RhYmxlJG4pCkZUX2NvdW50IDwtIGMoZnRyX3RhYmxlJG4pCgpkZl9yZXN1bHRzIDwtIGRhdGEuZnJhbWUoUmVzdWx0cywgSFRfY291bnQsIEZUX2NvdW50KQoKZmlnIDwtIHBsb3RfbHkoCiAgZGZfcmVzdWx0cywgeCA9IH5SZXN1bHRzLCB5ID0gfkhUX2NvdW50LCB0eXBlID0gJ2JhcicsIG5hbWUgPSAnSGFsZiBUaW1lIFNjb3JlJykgJT4lIAogIGFkZF90cmFjZSh5ID0gfkZUX2NvdW50LCBuYW1lID0gJ0Z1bGwgVGltZSBTY29yZScpICU+JQogIGxheW91dCh5YXhpcyA9IGxpc3QodGl0bGUgPSAnQ291bnQnKSwgYmFybW9kZSA9ICdncm91cCcpCgpmaWcKCmBgYAoKYGBge3J9CiMgSFRSICYgRlRSIGluIGVpbmVyIFNwYWx0ZSB6dXNhbW1lbmbDvGdlbgoKZGYkcmVzdWx0IDwtIHBhc3RlKGRmJEhUUiwgZGYkRlRSKQoKZGZfY291bnRfcmVzdWx0cyA8LSBkZiAlPiUgCiAgZ3JvdXBfYnkocmVzdWx0KSAlPiUgCiAgc3VtbWFyaXNlKGNvdW50X3Jlc3VsdCA9IG4oKSAvIG5yb3coZGYpICogMTAwKQoKZGZfY291bnRfcmVzdWx0cyAlPiUKICBwbG90X2x5KHggPSB+cmVvcmRlcihyZXN1bHQsIGNvdW50X3Jlc3VsdCksIHkgPSB+Y291bnRfcmVzdWx0KSAlPiUKICBhZGRfYmFycygpICU+JQogIGxheW91dCh4YXhpcyA9IGxpc3QoY2F0ZWdvcnlvcmRlciA9ICJ0b3RhbCBkZXNjZW5kaW5nIiwgdGl0bGUgPSAiR2FtZSBQcm9ncmVzcyIpLAogICAgICAgICB5YXhpcyA9IGxpc3QodGl0bGUgPSAiUHJvYmFiaWxpdHkiKSwKICAgICAgICAgdGl0bGUgPSAiV2hhdCBpcyB0aGUgcHJvYmFiaWxpdHkgb2YgYSBnYW1lIHByb2dyZXNzPyIpCgpgYGAKYGBge3J9CmNhbGNfcHJvYiA8LSBmdW5jdGlvbihkZjEsIGRmMikgewogIHdpbl9wcm9iIDwtIDEwMCAvIG5yb3coZGYxKSAqIG5yb3coZGYyKQp9CmBgYAoKYGBge3J9CmRmX2hhbGZ0aW1lX2hvbWUgPC0gZGYgJT4lIGZpbHRlcihIVFIgPT0gIkgiKQpkZl9oYWxmdGltZV9hd2F5IDwtIGRmICU+JSBmaWx0ZXIoSFRSID09ICJBIikKZGZfaGFsZnRpbWVfZHJhdyA8LSBkZiAlPiUgZmlsdGVyKEhUUiA9PSAiRCIpCgpkZl9mdWxsdGltZV9ob21lX3dpbiA8LSBkZl9oYWxmdGltZV9ob21lICU+JSBmaWx0ZXIoRlRSID09ICJIIikKZGZfZnVsbHRpbWVfYXdheV93aW4gPC0gZGZfaGFsZnRpbWVfYXdheSAlPiUgZmlsdGVyKEZUUiA9PSAiQSIpCmRmX2Z1bGx0aW1lX2RyYXcgPC0gZGZfaGFsZnRpbWVfZHJhdyAlPiUgZmlsdGVyKEZUUiA9PSAiRCIpCgojIEhlaW10ZWFtIGbDvGhydCB6dXIgSGFsYnplaXQgdW5kIGdld2lubnQgZGFzIFNwaWVsCmhvbWVfd2luX3Byb2IgPC0gY2FsY19wcm9iKGRmX2hhbGZ0aW1lX2hvbWUsIGRmX2Z1bGx0aW1lX2hvbWVfd2luKQpwcmludChob21lX3dpbl9wcm9iKQpwcmludChucm93KGRmX2Z1bGx0aW1lX2hvbWVfd2luKSkKCiMgQXVzd8OkcnRzdGVhbSBmw7xocnQgenVyIEhhbGJ6ZWl0IHVuZCBnZXdpbm50IGRhcyBTcGllbAphd2F5X3dpbl9wcm9iIDwtIGNhbGNfcHJvYihkZl9oYWxmdGltZV9hd2F5LCBkZl9mdWxsdGltZV9hd2F5X3dpbikKcHJpbnQoYXdheV93aW5fcHJvYikKcHJpbnQobnJvdyhkZl9mdWxsdGltZV9hd2F5X3dpbikpCgojIFVuZW50c2NoaWVkZW4genVyIEhhbGJ6ZWl0IHVuZCBhdWNoIGFtIEVuZGUgZGVzIFNwaWVscwpkcmF3X3Byb2IgPC0gY2FsY19wcm9iKGRmX2hhbGZ0aW1lX2RyYXcsIGRmX2Z1bGx0aW1lX2RyYXcpCnByaW50KGRyYXdfcHJvYikKcHJpbnQobnJvdyhkZl9mdWxsdGltZV9kcmF3KSkKCmBgYApgYGB7cn0KCmBgYAoK